********************************************************************************	
********************************************************************************
**	Data Preparation
********************************************************************************	
	
	
********************************************************************************
**	STATION DATA
********************************************************************************		

// 	Load Your Data
	cd "$raw"
	import excel "02_Other_Datasets\Station_Information_20150906.xlsx", sheet("Station_Information_20150906") firstrow clear

//	Keep id, brand, firm  latitude, longitude
	rename name firm 
	rename house_number number 
	rename post_code zip 
	rename place city
	rename lat latitude 
	rename lng longitude 
	keep id firm brand street number zip city latitude longitude
	
//	Destring zip codes & coordinates 
	destring zip, replace force
	destring latitude, replace
	destring longitude, replace
	
//	Renaming and Labeling	
	label var longitude "Longitude GPS Location of Gasoline Station"
	label var latitude "Latitude GPS Location of Gasoline Station"
	label var zip "ID of  Gasoline Station"
	label var id "ID of Gasoline Station"
	label var firm "Name of Gasoline Station"					
	label var brand "Brand of Gasoline Station"
	label var street "Street where Gasoline Station is located"
	label var number "Number where Gasoline Station is located"
	
	label values zip cities
		
//	Replace all capitals in strings by lower case	
	replace firm = lower(firm)
	replace brand = lower(brand)

//	Drop stations that are not activ anymore
	drop if firm == "nicht mehr aktiv"
	drop if brand == "nicht mehr aktiv"
	drop if street == "nicht mehr aktiv"
	drop if city == "nicht mehr aktiv"

//	Replace lat long = 0 with missing	
	replace latitude = . if latitude == 0
	replace longitude =. if longitude == 0
	
//	For two gasoline stations we have a lat long information, just the zip code is missing. 
//	I searched for the right zip code via the internet
	
	replace zip = 35767 if latitude == 50.681301116943359 & longitude == 8.1481218338012695
	replace zip = 57439 if latitude == 51.107856750488281 & longitude == 7.8485908508300781
	replace zip = 12439 if street =="Kȴpenicker Landstraǿe 294"
	replace zip = 07629 if latitude == 50.891387939453125 & longitude == 11.863140106201172
	replace zip = 06406 if latitude == 51.806259155273438 & longitude == 11.693084716796875
	replace zip = 76863 if latitude == 49.150276184082031 & longitude == 8.2328147888183594
	replace zip = 76829 if latitude == 49.207778930664063 & longitude == 8.1358699798583984
	replace zip = 25923 if latitude == 54.86920166015625 & longitude ==8.9062204360961914
	replace zip = 76887 if latitude == 49.102798461914063 & longitude == 8.012080192565918
	
//	Find latitude and longitude information

	replace latitude = 50.8696096	if id == "2e2141a5-910c-47f9-8303-54955776d087"
	replace longitude = 11.8880821 	if id == "2e2141a5-910c-47f9-8303-54955776d087"
	
	replace latitude = 50.2381214	if id == "5bfdb559-cc8b-4496-992d-b5912a159f14"
	replace longitude = 8.8617259 	if id == "5bfdb559-cc8b-4496-992d-b5912a159f14"
	
	replace latitude = 52.4646839	if id == "220661c8-e1f3-4308-8a02-85248dd58d76"
	replace longitude = 13.5005047 	if id == "220661c8-e1f3-4308-8a02-85248dd58d76"
	
	replace latitude = 50.0562046	if id == "95f72263-54d6-4c94-af7d-be4b7dcc9a31"
	replace longitude = 11.6102532 	if id == "95f72263-54d6-4c94-af7d-be4b7dcc9a31"
	
//	The remaining 6 observations don't have enough information to pin down the exact location
	drop if latitude == .
	
//	Some Stations mixed latitude and longitude
	
	gen latitude_help = latitude
	gen longitude_help = longitude
	
	foreach i in 	00060821-0001-4444-8888-acdc00000001 ///
					00060787-0001-4444-8888-acdc00000001 ///
					78e24902-503a-4a88-a3ec-abdc50aaa826 ///
					00060056-0001-4444-8888-acdc00000001  ///
					c1adc487-b268-4e30-9d43-16c84db44fd6 ///
					00061300-0001-4444-8888-acdc00000001 ///
					00060650-0001-4444-8888-acdc00000001 ///
					00060011-0006-4444-8888-acdcffffffff ///
					00060207-0008-4444-8888-acdcffffffff ///
					00060251-0012-4444-8888-acdcffffffff ///
					00060569-0011-4444-8888-acdcffffffff ///
					00060971-0044-4444-8888-acdcffffffff ///
					00061111-0009-4444-8888-acdcffffffff ///
					00061431-0002-4444-8888-acdcffffffff ///
					00060808-0001-4444-8888-acdc00000001 ///
					00060804-0001-4444-8888-acdc00000001 ///
					00060332-0001-4444-8888-acdc00000001 ///
					00060289-0001-4444-8888-acdc00000001 ///
					255ad476-6eed-449e-a668-b43db91045a4 ///
					07de1e3e-0814-43ba-b2c7-e0d39a5e692a ///
					4fdef6f5-6733-4e40-9f3a-ebc1de9871fa ///
					00060224-0002-4444-8888-acdc00000002 ///
					813a3731-ffdd-45a7-96e8-efcd4365f082 ///
					00060752-0001-4444-8888-acdc00000001 ///
					00060333-0001-4444-8888-acdc00000001 ///
					cfbf5103-3555-4cd2-9520-da31f1625c53 {
		replace latitude = longitude_help if id == "`i'"
		replace longitude = latitude_help if id == "`i'"
		}
			
	drop latitude_help longitude_help

// 	Generate brand dummies for...
		
	//	A) Supermarket
		
		gen brand_supermarket = 0
		replace brand_supermarket = 1 if regexm(brand, "supermarkt")
		replace brand_supermarket = 1 if regexm(brand, "edeka")
		replace brand_supermarket = 1 if regexm(brand, "marktkauf")
		replace brand_supermarket = 1 if regexm(brand, "kaufland")
		replace brand_supermarket = 1 if regexm(brand, "e-center")|regexm(brand, "e center")
		replace brand_supermarket = 0 if regexm(brand, "lente center")
		replace brand_supermarket = 1 if regexm(brand, "rewe")
		replace brand_supermarket = 1 if regexm(brand, "real")
		replace brand_supermarket = 1 if regexm(brand, "real")
		replace brand_supermarket = 1 if regexm(brand, "v-markt")
	
	//	B) Bund Freier Tankstellen
		
		gen brand_bft = 0
		replace brand_bft = 1 if regexm(brand, "freie")
		replace brand_bft = 1 if regexm(brand, "frei")
		replace brand_bft = 1 if regexm(brand, "bft")
		replace brand_bft = 1 if regexm(firm, "freie tankstelle")| regexm(firm, "bft")| regexm(firm, "freie sb tankstelle")|regexm(firm, "freie tank")

//	Generate brand

	gen brand_id = 0

	/*	1) Aral 		*/	replace brand_id = 1 if regexm(brand, "aral")
	/*	2) Shell 		*/ 	replace brand_id = 2 if regexm(brand, "shell")
	/*	3) Total		*/	replace brand_id = 3 if regexm(brand, "total")
	/*	4) Esso			*/	replace brand_id = 4 if regexm(brand, "esso")
							replace brand_id = 0 if regexm(firm, "walter dups e.k.")
	/*	5) Avia			*/	replace brand_id = 5 if regexm(brand, "avia")
	/*	6) Jet			*/	replace brand_id = 6 if regexm(brand, "jet")
	/*	7) Raiffeisen	*/	replace brand_id = 7 if regexm(brand, "raiffeisen")
	/*	8) Orlen		*/	replace brand_id = 8 if regexm(brand, "orlen")| regexm(brand, "star")
	/*	9) Agip			*/	replace brand_id = 9 if regexm(brand, "agip")
	/*	10) Hemtamoil	*/	replace brand_id = 10 if regexm(brand, "hem")|regexm(brand, "tamoil")
							replace brand_id = 0 if regexm(brand, "hempelmann")
	/*	11) OMV			*/	replace brand_id = 11 if regexm(brand, "omv")
	/*	12) Westfalen	*/	replace brand_id = 12 if regexm(brand, "westfalen")
							replace brand_id = 7 if regexm(brand, "raiffeisen westfalen nord eg")
	/*	13) Oil!		*/	replace brand_id = 13 if regexm(brand, "oil!")
							replace brand_id = 0 if regexm(firm, "freie tankstelle hildesheim")
	/*	14) Beckmann Q1 */	replace brand_id = 14 if regexm(brand, "q1")
	/*	15) Hoyer		*/	replace brand_id = 15 if regexm(brand, "hoyer")
	/*	16) Baywa		*/	replace brand_id = 16 if regexm(brand, "baywa")| regexm(firm, "baywa")
	/*	17) Lother		*/	replace brand_id = 17 if regexm(brand, "lother")
	/*	18) Classic		*/	replace brand_id = 18 if regexm(brand, "classic")	
	/*	19) HPV			*/	replace brand_id = 19 if regexm(brand, "hpv")
	/*	20) Sprint		*/	replace brand_id = 20 if regexm(brand, "sprint")
	/*	21) Calpam		*/	replace brand_id = 21 if regexm(brand, "calpam")
	/*	22) Score		*/	replace brand_id = 22 if regexm(brand, "score")
	/*	23) SVG			*/	replace brand_id = 23 if regexm(brand, "svg")
	/*	24) Pinoil		*/	replace brand_id = 24 if regexm(brand, "pinoil")
	/*	25) Bavaria Petrol*/replace brand_id = 25 if regexm(brand, "bavaria")
	/*	26) Ellermontan	*/	replace brand_id = 26 if regexm(brand, "eller-montan")	
	/*	28) Famila 		*/	replace brand_id = 27 if regexm(brand, "famila")	
	/*	29) Lomo		*/	replace brand_id = 28 if regexm(brand, "lomo")
	/*	30)	A Energie	*/	replace brand_id = 29 if regexm(brand, "a energie")
	/*	31)	Adap		*/	replace brand_id = 30 if regexm(brand, "adap")
	/*	32)	Ahr			*/	replace brand_id = 31 if regexm(brand, "ahr")
	/*	33) Allguth		*/	replace brand_id = 32 if regexm(brand, "allguth")
	/*	34) Amb			*/	replace brand_id = 33 if regexm(brand, "amb")
	/*	35) Avex		*/	replace brand_id = 34 if regexm(brand, "avex")
	/*	36) Access		*/	replace brand_id = 35 if regexm(brand, "access")
	/*	37) Ahlert		*/	replace brand_id = 36 if regexm(brand, "ahlert")
	/*	38) Argosoil	*/	replace brand_id = 37 if regexm(brand, "argosoil")
	/*	39) AutoweltBadS*/	replace brand_id = 38 if regexm(brand, "autowelt bad saulgau")
	/*	40) Avanti		*/	replace brand_id = 39 if regexm(brand, "avanti")
	/*	41) Bell Oil	*/	replace brand_id = 40 if regexm(brand, "bell oil") 
	/*	42) BK						*/	replace brand_id = 41 if regexm(brand, "bk")
	/*	43) Bunte Mineral Handel	*/	replace brand_id = 42 if regexm(brand, "bunte mineral") 
	/*	44 Bergler					*/	replace brand_id = 43 if regexm(brand, "bergler")
	/*	45) Brachth嶳erv			*/	replace brand_id = 44 if regexm(brand, "brachth") 
	/*	46) Brandol					*/	replace brand_id = 45 if regexm(brand, "brandol") 
	/*	47) Bremer Mineral򬨡ndel	*/	replace brand_id = 46 if regexm(brand, "bremer mineral") 
	/*	48) C+C Markt				*/	replace brand_id = 47 if regexm(brand, "c markt") | regexm(brand, "c oberall")
	/*	49) Citti					*/	replace brand_id = 48 if regexm(brand, "citti")
	/*	50) City Tank		*/	replace brand_id = 49 if regexm(brand, "city tank") 
	/*	51) CleanCar		*/	replace brand_id = 50 if regexm(brand, "cleancar") 
	/*	52) Dietz			*/	replace brand_id = 51 if regexm(brand, "dietz") 
	/*	53) ECO				*/	replace brand_id = 52 if regexm(brand, "eco")
	/*	54) EDI 			*/	replace brand_id = 53 if regexm(brand, "edi")
	/*	55) ED				*/	replace brand_id = 54 if regexm(brand, "ed") & brand_supermarket == 0 & brand_id != 53 
	/*	56) Elan			*/	replace brand_id = 55 if regexm(brand, "elan")
	/*	57) Extrol			*/	replace brand_id = 56 if regexm(brand, "extrol") 
	/*	58) Eberhardt		*/	replace brand_id = 57 if regexm(brand, "eberhardt")
	/*	59) Ektra 			*/	replace brand_id = 58 if regexm(brand, "ektra")
	/*	60) Elf				*/	replace brand_id = 59 if regexm(brand, "elf") 
	/*	61) Felta			*/	replace brand_id = 60 if regexm(brand, "felta") 

//	Generate dummies for A brands: These are Aral, Shell, Esso, Total and Jet (so called oligopolists (Bundeskartellamt 2011 Abschulssbericht Sektoruntersuchung Kraftstoffe)
	
	gen abrand_id = 0
	replace abrand_id = 1 if brand_id == 1			// 	Aral	
	replace abrand_id = 2 if brand_id == 2			//	Shell
	replace abrand_id = 3 if brand_id == 4			//	Esso
	replace abrand_id = 4 if brand_id == 3			//	Total
	replace abrand_id = 5 if brand_id == 6			//	Jet
	
	label var brand_id "Brand of the Gasoline Station"
	label define brands 1 "Aral" 2 "Shell" 3 "Total" 4 "Esso" 5 "Avia" 6 "Jet" 7 "Raiffeisen" 8 "Orlen" 9 "Agip" 10 "Hemtamoil" 11 "Omv" 12 "Westfalen" 13 "Oil!" 14 "Beckmann Q1" 15 "Hoyer" 16 "BayWa" 17 "Lother" 18 "Classic" 19 "Hpv" 20 "Sprint" 21 "Calpam" 22 "Score" 23 "Svg" 24 "Pinoil" 25 "Bavaria Petrol" 26 "Eller-Montan"
	label values brand_id brands
	
	label var brand_supermarket "Dummy for Supermarket Station"
	label var brand_bft "Dummy for Membership in the BFT"

//	Encode firm and brand	
	rename brand v3
	encode v3, gen(brand)
	label var brand "Brand of Gasoline Station"
	drop v3
	
	rename firm v2
	encode v2, gen(firm)
	label var firm "Firm of Gasoline Station"
	drop v2

	sort id 
	order id firm brand street number zip latitude longitude brand_id brand_supermarket brand_bft 
	
//	Manual changes to correct zipcodes
	replace zip = 31174 if zip == 1174
	replace zip = 01239 if zip == 1275
	replace zip = 04838 if zip == 04862
	replace zip = 09557 if zip == 09537
	replace zip = 18273 if zip == 18272 
	replace zip = 24589 if zip == 24599
	replace zip = 24955 if zip == 24952
	replace zip = 25451 if zip == 25442
	replace zip = 25693 if zip == 25772
	replace zip = 26180 if zip == 26170
	replace zip = 27637 if zip == 27639
	replace zip = 32584 if zip == 32484
	replace zip = 32699 if zip == 32695
	replace zip = 33758 if zip == 33578
	replace zip = 34253 if zip == 34523
	replace zip = 36273 if zip == 36273
	replace zip = 38486 if zip == 38496
	replace zip = 49685 if zip == 45685
	replace zip = 51467 if zip == 51247
	replace zip = 54516 if zip == 54519
	replace zip = 56235 if zip == 56224
	replace zip = 59368 if zip == 59386
	replace zip = 63785 if zip == 63778
	replace zip = 69245 if zip == 69243
	replace zip = 73235 if zip == 72335
	replace zip = 84550 if zip == 84450
	replace zip = 84513 if zip == 84512
	replace zip = 86753 if zip == 86763
	replace zip = 91438 if zip == 91425
	replace zip = 94559 if zip == 94595
	replace zip = 99195 if zip == 99095
	replace zip = 25693 if zip == 25722

preserve	
// Load state identifiers from Amtliche Gemeindeschluessel as at 31.12.2013
	cd "$raw"
	import excel "02_Other_Datasets\31122013_Auszug_GV.xlsx", sheet("Gemeindedaten") firstrow clear
	keep Postleitzahl RegionalschlüsselRS
	rename Postleitzahl zip
	rename RegionalschlüsselRS state
	drop if zip == ""
	destring zip , replace
	destring state , replace
	duplicates drop

	// Some ZIP codes are in two states
		drop if zip == 14715 & state == 15
		drop if zip == 17337 & state == 13
		drop if zip == 19273 & state == 13
		drop if zip == 19357 & state == 13
		drop if zip == 37194 & state == 6
		drop if zip == 59969 & state == 6
		drop if zip == 65391 & state == 7
		drop if zip == 69434 & state == 8
			
	tempfile state_id
	save `state_id' , replace
restore
	
	
//	Merge with state ID
	merge m:1 zip using `state_id', keep(match master)

// Fill state IDs that are still missing with ZIP to state matches using OSM data
preserve
	keep if _merge == 1
	drop _merge
	tempfile missing_state
	save `missing_state' , replace

* Load additional zip code-bundesland data from another source:
	cd "$raw"
	import excel "02_Other_Datasets\zuordnung_plz_ort", sheet("Worksheet") firstrow clear

* Keep postal codes + federal state names
keep plz bundesland

* Destring postal codes
destring plz, replace

* Rename
rename plz zip

* Create numeric state id
	gen state = .
	replace state = 1 if bundesland == "Schleswig-Holstein"
	replace state = 2 if bundesland == "Hamburg"
	replace state = 3 if bundesland == "Niedersachsen"
	replace state = 4 if bundesland == "Bremen"
	replace state = 5 if bundesland == "Nordrhein-Westfalen"
	replace state = 6 if bundesland == "Hessen"
	replace state = 7 if bundesland == "Rheinland-Pfalz"
	replace state = 8 if bundesland == "Baden-Württemberg"
	replace state = 9 if bundesland == "Bayern"
	replace state = 10 if bundesland == "Saarland"
	replace state = 11 if bundesland == "Berlin"
	replace state = 12 if bundesland == "Brandenburg"
	replace state = 13 if bundesland == "Mecklenburg-Vorpommern"
	replace state = 14 if bundesland == "Sachsen"
	replace state = 15 if bundesland == "Sachsen-Anhalt"
	replace state = 16 if bundesland == "Thüringen"
	
	keep zip state
	duplicates drop

	* Some ZIP codes are in two states
		drop if zip == 7919  & state == 16
		drop if zip == 14715 & state == 15
		drop if zip == 17337 & state == 13
		drop if zip == 19273 & state == 13
		drop if zip == 19357 & state == 13
		drop if zip == 21039 & state == 1
		drop if zip == 22113 & state == 2
		drop if zip == 22145 & state == 2
		drop if zip == 34355 & state == 6
		drop if zip == 37194 & state == 6
		drop if zip == 59969 & state == 6
		drop if zip == 63928 & state == 8
		drop if zip == 65391 & state == 7
		drop if zip == 69412 & state == 6
		drop if zip == 69434 & state == 8
		drop if zip == 74731 & state == 9
		drop if zip == 89081 & state == 9
		drop if zip == 97896 & state == 9
	
	merge 1:m zip using `missing_state' , keep(match using)
		drop if _merge == 1
		drop _merge

	* Manually fill in state for stations where state remains missing_state
		replace state = 15 if id == "3489fb72-773a-40c6-987e-e8b9c5174347"
		replace state = 15 if id == "61f45d58-3a17-4559-9173-394c86b09596"
		replace state = 15 if id == "e9fe68d3-ed46-428f-9e1c-5d4c6dd25fd2"
		replace state = 12 if id == "e1a15081-251a-9107-e040-0b0a3dfe563c"
		replace state = 1 if id == "005056ba-7cb6-1ed2-bceb-74a6f3fccd25"
		replace state = 3 if id == "02b384ea-65ef-47ac-8762-2b84fda7ee08"
		replace state = 3 if id == "00061002-0548-4444-8888-acdc00000548"
		replace state = 6 if id == "82259e74-6f30-4d3f-a474-ab6143670187"
		replace state = 5 if id == "e2c08e78-acde-4209-9eb3-82086a2169f2"
		replace state = 5 if id == "013bb5ad-a3d1-4bf5-af91-db0086e54eba"
		replace state = 6 if id == "5914a7a9-1553-4cfd-9edd-25f0c0eb29f5"
		replace state = 9 if id == "00060399-0001-4444-8888-acdc00000001"
		replace state = 16 if id == "1065719d-175d-4aa8-ba35-6fc9a449a526"
		replace state = 7 if id == "a21e3ed2-6d03-478f-32a3-9f33202182bc"
		replace state = 5 if id == "62de0ae6-aae6-4fdd-9d16-de5c4adf2075"
		replace state = 1 if id == "cddf9d8f-d025-4078-9c26-717998001773"
		replace state = 7 if id == "e69a923a-ba11-409b-2720-2b908ed66498"
		replace state = 7 if id == "de1e26ff-6fb8-436e-b819-877ab581527a"
		replace state = 6 if id == "6d4bfcc4-44ba-4884-a8b5-72e36dfb7582"
		replace state = 15 if id == "33871888-2235-4a12-8e15-5074a35da9b5"
		replace state = 11 if id == "220661c8-e1f3-4308-8a02-85248dd58d76"
		
	assert state != .
	
	tempfile manual_state
	save `manual_state' , replace
restore

* Load stations that could get a state attribution initially and append those that got a state in the second round
	keep if _merge == 3
	drop _merge
	append using `manual_state'

	assert state != .
	
	
// Rename id 
	rename id id_data
	
//	Save Stations Data
	compress
	cd "$dta"
	save "stations.dta", replace
	
	
	
	
********************************************************************************
**	PRICE NOTIFICATIONS
********************************************************************************

*************************
//	Mehrtanken data
*************************
//	Load Data
	foreach i in 2013_01 2013_02 2013_03 2013_04 2013_05 2013_06 2013_07 2013_08 2013_09 {
		clear
		cd "$raw"
		insheet using "01_Mehr_Tanken/stations_`i'.csv"
		compress
		tempfile prices_`i'
		save `prices_`i''
		}

	use "`prices_2013_01'"
	foreach i in 2013_02 2013_03 2013_04 2013_05 2013_06 2013_07 2013_08 2013_09 {
		append using `prices_`i''
		}
	
//	Rename data
	
	rename v1 id_data
	rename v2 fuel_type
	rename v3 price
	rename v4 received

//	Trim data id data
	
	replace id_data = ltrim(id_data)
	
//	Keep if e5, e10 or diesel

	keep if fuel_type == 1 | fuel_type == 2 | fuel_type == 4
	
	replace fuel_type = 3 if fuel_type == 4		// 1 == e10; 2 == e5; 3 == diesel

	
//	Bring data in MTS-K format
		keep id_data received fuel_type price
		rename price price_
		encode id_data , g(en_id)
		drop id_data
		duplicates drop
		gcollapse (mean) price_ , by(en_id received fuel_type)
		reshape wide price_ , i(en_id received) j(fuel_type)

	//	Clean variable names, order and sort
		rename price_1 e10
		rename price_2 e5
		rename price_3 diesel
		decode en_id , g(id_data)
		drop en_id
		order id_data received e5 e10 diesel
		sort id_data received 

	
	//	Prepare data for merge with other price data
		
		keep id_data received e5 e10 diesel
	
		
	//	Save
			tempfile prices_mehrtanken
			save `prices_mehrtanken'
		

	
*************************			
//	Tanken tanken data
*************************			
	//	Load and append data
		foreach i in 2013_10_01 2013_10_02 2013_11 2013_12 2014_01 2014_02 2014_03 2014_04 2014_05_01 2014_05_02 2014_06 2014_07 2014_08_01 2014_08_02 2014_09 2014_10 2014_11 2014_12 {
			clear
			cd "$raw"
			insheet using "03_TankenTanken/prices_`i'.csv"
			tempfile prices_`i'
			save `prices_`i''
			}
		
		use "`prices_2013_10_01'"
		foreach i in 2013_10_02 2013_11 2013_12 2014_01 2014_02 2014_03 2014_04 2014_05_01 2014_05_02 2014_06 2014_07 2014_08_01 2014_08_02 2014_09 2014_10 2014_11 2014_12 {
			append using `prices_`i''
			}
		
	//	Drop obs identifier	
		drop v1
	
	//	Trim data id data
		replace v2 = ltrim(v2)	
		
	//  Destring
		destring v4, gen(e5) force
		label var e5 "Price of e5"
		drop v4
		
		destring v5, gen(e10) force
		label var e10 "Price of e10"
		drop v5
		
		destring v6, gen(diesel) force
		label var diesel "Price of diesel"
		drop v6
	
	// 	Rename
		rename v3 received
		rename v2 id_data
			
		
***************************			
//	Append Mehrtanken data
***************************	
	
	//	Append mehrtanken data
		append using `prices_mehrtanken'		
		
	//	Modify id_data
		
		drop if id_data == ""
		drop if id_data == "mts-de-1234567890123"
		drop if id_data	== "gui-test"	
		replace id_data = "3DE8A3CB-CE17-46A4-99C5-2C70A893FEC4" if id_data == "{3DE8A3CB-CE17-46A4-99C5-2C70A893FEC4} "
	
	//	Change ID
		*gen id_data_raw = id_data
		replace id_data = lower(id_data)
		replace id_data = trim(id_data)
		replace id_data = itrim(id_data)
		
	//	Order 	
		order id_data received e5 e10 diesel
	
		
	// 	Date and time
	
		gen received_date = substr(received,1,10)								
		gen date = date(received_date,"YMD")			
		format date  %td
		drop received_date
		
		gen double clock = clock(received,"YMDhms")					
		format clock  %tc

		gen received_time = substr(received,11,19)
		gen double time = clock(received_time,"hms")								
		format time  %tc
		drop received_time	
		
		drop received
		
	//	Generate month and year variable
		gen month = month(date)
		gen year = year(date)
	
	//	Generate hour and datetime information
		gen hour = hh(clock)
		order id_data date clock time hour
		
		gen double datehour=cofd(date)
		format datehour %tc
		replace datehour = datehour + hour *1000*60*60
			
	//	Drop duplicates: If there are two price messages at same time: Drop one of them
		duplicates drop id_data clock, force
	
	//	Give each observation an identifier where the data comes from
	
		//	Mehr Tanken
			gen source_mt = 0
			replace source_mt = 1 if date <= 19614 // September 12th, 2013
		
		//	Tanken Tanken Voluntary 
			gen source_ttv = 0
			replace source_ttv = 1 if  date > 19614 & date <19693 // September 13th, 2013
		
		//	Tanken Tanken Mandatory 
			gen source_ttm = 0
			replace source_ttm = 1 if  date >= 19693 &  date < 20120 
		
		
		//	 Generate source variable
			gen source = .
			replace source = 1 if source_mt == 1
			replace source = 2 if source_ttv == 1
			replace source = 3 if source_ttm == 1
			label def source 1 "Mehr tanken" 2 "Tanken tanken (voluntary)" 3 "Tanken tanken (mandatory)" , replace
			label val source source
			
	//	Generate date regarding price // Important to see how much time since last change
		foreach i in e5 e10 diesel {
			gen `i'_date = date if !missing(`i')
			bysort id_data (clock): replace `i'_date = `i'_date[_n-1] if missing(`i'_date)
			format `i'_date %td
			}
			
	//	Generate date differences
		foreach i in e5 e10 diesel {
			bysort id_data (clock): gen `i'_datedif = `i'_date - `i'_date[_n-1]
			}
				
			
	//	Drop implausibly high / low prices		
		foreach i in e5 e10 diesel {
			replace `i' = . if `i'< 0.899 | `i'> 1.99
			}		
	
		
		
	//	Order and sort
	
		order id_data date month year clock datehour hour time e5* e10* diesel*
		sort id_data clock
	
	//	Labeling
		
		foreach i in e5 e10 diesel {
			label var `i' "Price of `i'"
			}
	
	//	Label 
		label var id_data "Unique gasoline station ID"
		label var date  "Date"
		label var month  "Month"
		label var year  "Year"
		label var clock  "Time and date"
		label var datehour  "Hour and date"
		label var hour  "Hour"
		label var time  "Time"
		
		foreach i in e5 e10 diesel {
			label var `i' "Raw `i' price"
			label var `i'_date  "Date last price change (`i')"			
			}
			
		label var source_mt  "Mehr Tanken data"
		label var source_ttv  "Tanken tanken (voluntary) data "
		label var source_ttm  "Tanken tanken (mandatory) data"
	
	//	Save
		compress
		save "$dta\price_notifications.dta",replace
		
			
********************************************************************************
**	PRICE PANEL
********************************************************************************

//	Price panel for e5

//	Load Data
	use "$dta/price_notifications.dta", clear

//	Just keep variables we need
	keep id_data date clock e5 source time 
	
//	Also drop notifications with no price info
	drop if e5 == .
	
//	Drop duplicates
	duplicates drop id_data clock, force

//	Generate prices at 5am to 10pm

	foreach i in e5 {
	forval j=5(1)22 {
	bysort id_data date (clock): gen `i'_`j'oclock_help1 = cond(time[_n] < `j'*60*60*1000, `i'[_n], .)
		bysort id_data date (clock): replace `i'_`j'oclock_help1 = cond(id_data[_n] == id_data[_n+1] & time [_n+1] < `j'*60*60*1000,., `i'_`j'oclock_help1[_n])
		bysort id_data date: egen `i'_`j'oclock = sum(`i'_`j'oclock_help1)
		bysort id_data (clock): gen `i'_`j'oclock_help2 = cond(`i'_`j'oclock[_n] == 0 & id_data[_n] == id_data[_n-1] & date[_n] != date[_n-1] ,`i'[_n-1], .)
		bysort id_data date: replace `i'_`j'oclock = sum(`i'_`j'oclock_help2) if `i'_`j'oclock == 0
		replace `i'_`j'oclock = . if `i'_`j'oclock == 0 // There are missings if at the opening day there is a price notification after `j'oclock	
		drop `i'_`j'oclock_help*
		}
		}

// 	Check where the missings come from
	bysort id_data (clock): gen e5_opening = date[1]
	format e5_opening %td
	assert e5_5 !=. if date != e5_opening
	
// Generate variable with first price notification on given day
	bysort id_data date: egen first_notif_date = min(time)
	format first_notif_date  %tc
	order id_data date clock time first_notif_date
	
//	Now we just want one date price
	cap drop e5 clock time  
	duplicates drop id_data date, force

//	Tsset-data and create Panel
	
	egen id_data2=group(id_data)
	tsset id_data2 date
	tsfill, full																
	
//	Fill data
	bysort id_data2 (date): replace id_data = id_data[_n-1] if id_data[_n]==""
	
	gen minus_date = - date
	bysort id_data2 (minus_date): replace id_data = id_data[_n-1] if id_data[_n]==""
	drop minus_date

	
	replace source = 1 if date <= 19614 
	replace source = 2 if date > 19614 & date <19693
	replace source = 3 if date >= 19693 &  date < 20120


//	Carryforward prices
	** 	Carryforward rule: Carry prices forward for one day.
	
	gen date_positive = date if first_notif_date !=.
	bysort id_data (date): replace date_positive = date_positive[_n-1] if date_positive==.
	
	forval j=5(1)22 {
		gen e5_`j'oclock_panel = e5_`j'oclock
		}

	** Work with one-day carry forward
	bysort id_data (date): replace e5_5oclock_panel = e5_22oclock_panel[_n-1] if first_notif_date == . & date - date_positive == 1
	
	forval j=6(1)22 {
		loc k = `j' - 1
		replace e5_`j'oclock_panel = e5_`k'oclock_panel if first_notif_date == .
		}

	** Work with one-day carry forward
	forval j=5(1)22 {
		replace e5_`j'oclock_panel = . if first_notif_date != . & date - date_positive[_n-1] > 1 & first_notif_date > `j'*60*60*1000
		replace e5_`j'oclock = . if first_notif_date != . & first_notif_date > `j'*60*60*1000
		}

		
//	Generate day of the week variable
	gen dow = dow(date)
	label def dow 0 "Sunday" 1 "Monday" 2 "Tuesday" 3 "Wednesday" 4 "Thursday" 5 "Friday" 6 "Saturday", replace
	label val dow dow
	
	gen weekend = 0 
	replace weekend = 1 if dow == 6 | dow == 0
	label def weekend 0 "Weekday" 1 "Weekend"
	label val weekend weekend
	
//	Label
	label var source "Source"
	label var dow "Day of the week"
	label var weekend "1 if weekend"
	label var e5_opening "First active date"
	
	foreach i in e5 {
		forval j=5(1)22 {
			label var `i'_`j'oclock "Price at `j' (`i')"
			label var `i'_`j'oclock_panel "Carryforward price at `j' (`i') "
			}
			}
	
//	Save Panel
	drop id_data2 date_positive first_notif_date
	
	compress
	tempfile price_panel_e5
	save `price_panel_e5'
	
	
	
//	Price panel for diesel (gazole)

//	Load Data
	use "$dta/price_notifications.dta", clear

//	Just keep variables we need
	keep id_data date clock diesel source time 
	rename diesel gazole 
	
//	Also drop notifications with no price info
	drop if gazole == .
	
//	Drop duplicates
	duplicates drop id_data clock, force

//	Generate prices at 6pm

	foreach i in gazole {
	forval j=5(1)22 {
	bysort id_data date (clock): gen `i'_`j'oclock_help1 = cond(time[_n] < `j'*60*60*1000, `i'[_n], .)
		bysort id_data date (clock): replace `i'_`j'oclock_help1 = cond(id_data[_n] == id_data[_n+1] & time [_n+1] < `j'*60*60*1000,., `i'_`j'oclock_help1[_n])
		bysort id_data date: egen `i'_`j'oclock = sum(`i'_`j'oclock_help1)
		bysort id_data (clock): gen `i'_`j'oclock_help2 = cond(`i'_`j'oclock[_n] == 0 & id_data[_n] == id_data[_n-1] & date[_n] != date[_n-1] ,`i'[_n-1], .)
		bysort id_data date: replace `i'_`j'oclock = sum(`i'_`j'oclock_help2) if `i'_`j'oclock == 0
		replace `i'_`j'oclock = . if `i'_`j'oclock == 0 // There are missings if at the opening day there is a price notification after `j'oclock	
		drop `i'_`j'oclock_help*
		}
		}

// 	Check where the missings come from
	bysort id_data (clock): gen gazole_opening = date[1]
	format gazole_opening %td
	assert gazole_5 !=. if date != gazole_opening
	
	
// Generate variable with first price notification on given day
	bysort id_data date: egen first_notif_date = min(time)
	format first_notif_date  %tc
	order id_data date clock time first_notif_date
	
//	Now we just want one date price
	cap drop gazole clock time  
	duplicates drop id_data date, force

//	Tsset-data and create Panel
	
	egen id_data2=group(id_data)
	tsset id_data2 date
	tsfill, full																
	
//	Fill data
	bysort id_data2 (date): replace id_data = id_data[_n-1] if id_data[_n]==""
	
	gen minus_date = - date
	bysort id_data2 (minus_date): replace id_data = id_data[_n-1] if id_data[_n]==""
	drop minus_date

	
	replace source = 1 if date <= 19614 
	replace source = 2 if date > 19614 & date <19693
	replace source = 3 if date >= 19693 &  date < 20120


//	Carryforward prices
	** 	Carryforward rule: Carry prices forward for one day.
	
	gen date_positive = date if first_notif_date !=.
	bysort id_data (date): replace date_positive = date_positive[_n-1] if date_positive==.
	
	forval j=5(1)22 {
		gen gazole_`j'oclock_panel = gazole_`j'oclock
		}

	** Work with one-day carry forward
	bysort id_data (date): replace gazole_5oclock_panel = gazole_22oclock_panel[_n-1] if first_notif_date == . & date - date_positive == 1
	
	forval j=6(1)22 {
		loc k = `j' - 1
		replace gazole_`j'oclock_panel = gazole_`k'oclock_panel if first_notif_date == .
		}

	** Work with one-day carry forward
	forval j=5(1)22 {
		replace gazole_`j'oclock_panel = . if first_notif_date != . & date - date_positive[_n-1] > 1 & first_notif_date > `j'*60*60*1000
		replace gazole_`j'oclock = . if first_notif_date != . & first_notif_date > `j'*60*60*1000
		}

		
//	Generate day of the week variable
	gen dow = dow(date)
	label def dow 0 "Sunday" 1 "Monday" 2 "Tuesday" 3 "Wednesday" 4 "Thursday" 5 "Friday" 6 "Saturday", replace
	label val dow dow
	
	gen weekend = 0 
	replace weekend = 1 if dow == 6 | dow == 0
	label def weekend 0 "Weekday" 1 "Weekend"
	label val weekend weekend
	
//	Label
	label var source "Source"
	label var dow "Day of the week"
	label var weekend "1 if weekend"
	label var gazole_opening "First active date gazole"
	
	foreach i in gazole {
		forval j=5(1)22 {
			label var `i'_`j'oclock "Price at `j' (`i')"
			label var `i'_`j'oclock_panel "Carryforward price at `j' (`i') "
			}
			}
	
//	Save Panel
	drop id_data2 date_positive first_notif_date
	
//	Save combined panel
	merge 1:1 id_data date using `price_panel_e5'
	drop _merge
	save "$dta/price_panel", replace

	
	
****************************************
** Daily weighted average fuel prices **
****************************************

// DESCRIPTION
// This code creates a dataset (in wide format) with fueling weights for every hour of the day 

** Load data
	cd "$raw"
	import excel "02_Other_Datasets\Abbildung11_Bundestagsbericht_daten.xlsx", sheet("Tabelle1") firstrow clear

** Drop unnecessary variables
	drop C-consumer_share

** Gen weight variables
	sort Hourofday
	separate ofConsumersnormalizedon10, by(Hourofday) gen(w_)
	gen one=1
	gcollapse (sum) w_*

** Save
	cd "$dta"
	save "fueling_weights.dta", replace

	

// Calculate average weighted prices		
		
				** Load price data
					cd "$dta"
					use "price_panel", clear
		
				** Merge with data on consumer fueling patterns
					cd "$dta"
					merge 1:1 _n using "fueling_weights.dta"

				** Fill in missing weights
					gen one=1
					forvalues j = 1(1)24 {
						foreach x in w_`j' {
							bysort one: replace `x' = 0 if `x'==.
							bysort one: gegen `x'_m = max(`x') 
							replace `x' = `x'_m
							drop `x'_m
						}
					}
					cap drop _merge one

				** Recalculate weights from 5am until 10pm
					foreach i in e5 gazole {
					forvalues j = 5(1)22 {	
						gen w_`j'_`i' = w_`j'
						replace w_`j'_`i' = . if `i'_`j'oclock_panel==.					// Account for missing prices
					}
					
					egen tot_`i' = rowtotal(w_5_`i' - w_22_`i')
					forvalues j = 5(1)22 {
						gen wt_`j'_`i' = w_`j'_`i'/tot_`i' 
					}
					

				** Weight hourly prices with consumer fueling patterns and calculate weighted average
					
					forvalues j = 5(1)22 {
						gen w_price_`i'_`j'= `i'_`j'oclock_panel * wt_`j'_`i'
					}
					egen wavg_price_`i' = rowtotal(w_price_`i'_5 - w_price_`i'_22)
					
				** Calculate daily station-level average (unweighted) fuel price
					egen avg_price_`i' = rowmean(`i'_5oclock_panel-`i'_22oclock_panel)
					}
				
				** Drop redundant variables
					drop w_* wt* w_price* tot* 
					
				** Replace weighted avg. price to missing (currently 0 instead)
					replace wavg_price_e5=. if avg_price_e5==.
					replace wavg_price_gazole=. if avg_price_gazole==.
					

				** Save data set
					keep id_data date source wavg_price_e5 wavg_price_gazole avg_price_e5 avg_price_gazole 
					compress
					cd "$dta"
					save "price_panel_wprice", replace
					
	
